# -*- coding: utf-8 -*-
# Copyright 2018-2019 Hylanda Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
from os.path import abspath, dirname
import jpype
from enum import Enum
import threading


class GrainSize(Enum):
    LARGE = 1
    NORMAL = 2
    SMALL = 3


class Word:
    def __init__(self, word_java):
        self.word_str = str(word_java.wordStr)
        self.user_tag = str(word_java.userTag) if word_java.userTag is not None else None
        self.type = int(word_java.type)
        self.nature_flag = int(word_java.natureFlag)
        self.nature = int(word_java.nature)
        self.weight = float(word_java.weight)


class HylandaSegment:
    global BasicSegmentor
    global SegOption
    global SegGrain
    global DictFlag
    global Consts

    BUILD_IN_USER_DICT = '__build_in_user_dict__'
    _lock = threading.RLock()
    _segmentor = None
    _option = None
    _init_ok = False

    @staticmethod
    def start_jvm(jvm_path=None, class_path=None, ext_dirs=None):
        """启动jvm，并初始化分词

        :param str jvm_path: 指定jvm路径，默认值为None，则使用jpype.getDefaultJVMPath()获取jvm路径
        :param str class_path: 指定要加载的jar包名，如果有多个的话以分号分隔, 默认值为None, 则仅加载海量分词的jar包
        :param str ext_dirs: 指定扩展路径，如果有多个的话以分号分隔, 默认值为None, 则仅指定分词jar包所在路径
        """
        with HylandaSegment._lock:
            if not jpype.isJVMStarted():
                # jvmPath = "/usr/local/jdk/jdk1.8.0_162/jre/lib/amd64/server/libjvm.so"
                if jvm_path is None:
                    jvm_path = jpype.getDefaultJVMPath()
                if class_path is None:
                    class_path = HylandaSegment.get_segment_jar_path()
                if ext_dirs is None:
                    ext_dirs = HylandaSegment._get_jar_path()
                # 打开jvm虚拟机
                jpype.startJVM(jvm_path, "-ea", f"-Djava.class.path={class_path}", \
                               f"-Djava.ext.dirs={ext_dirs}", \
                               convertStrings=False)
            HylandaSegment.init()

    @staticmethod
    def shutdown_jvm():
        """停止jvm
        """
        with HylandaSegment._lock:
            if jpype.isJVMStarted():
                jpype.shutdownJVM()
                HylandaSegment.uninit()

    @staticmethod
    def init():
        """初始化
        """
        with HylandaSegment._lock:
            if HylandaSegment._segmentor is None:
                # 取得类定义
                HylandaSegment.Segmentor = jpype.JClass('com.hylanda.segmentor.Segmentor')
                HylandaSegment.BasicSegmentor = jpype.JClass('com.hylanda.segmentor.BasicSegmentor')
                HylandaSegment.SegOption = jpype.JClass('com.hylanda.segmentor.common.SegOption')
                HylandaSegment.SegGrain = jpype.JClass('com.hylanda.segmentor.common.SegGrain')
                HylandaSegment.Consts = jpype.JClass('com.hylanda.segmentor.common.Consts')
                HylandaSegment.DictFlag = jpype.JClass('com.hylanda.dictionary.DictFlag')
                if HylandaSegment.BasicSegmentor is None:
                    jpype.shutdownJVM()
                    raise OSError("海量分词Java类加载失败！")
                # 创建分词对象
                HylandaSegment._segmentor = HylandaSegment.BasicSegmentor()
                # 创建SegOption对象，如果使用默认的分词选项，也可以直接传空
                HylandaSegment._option = HylandaSegment.SegOption()
                HylandaSegment._init_ok = False

    @staticmethod
    def uninit():
        """反初始化
        """
        with HylandaSegment._lock:
            HylandaSegment._segmentor = None
            HylandaSegment._option = None
            HylandaSegment._init_ok = False

    @staticmethod
    def is_init_ok():
        """是否初始化成功
        """
        with HylandaSegment._lock:
            return HylandaSegment._segmentor is not None

    @staticmethod
    def load_dictionary(core_dict_path=None, user_dict_path=None):
        """加载词典

        :param str core_dict_path: 指定核心词典路径，默认值为None，则使用默认核心词典
        :param str user_dict_path: 指定用户自定义词典路径，默认值为None，则不加载用户自定义词典
                可以用 HylandaSegment.BUILD_IN_USER_DICT 指定使用默认自定义用户词典
        """
        with HylandaSegment._lock:
            HylandaSegment._init_ok = False
            currdir = dirname(abspath(__file__))
            dict_path = os.path.join(currdir, 'dictionary')
            # 加载词典
            if core_dict_path is None:
                core_dict_path = os.path.join(dict_path, 'CoreDict.dat')
            if user_dict_path is not None and user_dict_path == HylandaSegment.BUILD_IN_USER_DICT:
                user_dict_path = os.path.join(dict_path, 'UserDict.txt')
            if not HylandaSegment._segmentor.loadDictionary(core_dict_path, user_dict_path):
                raise OSError("海量分词词典加载失败！")
            HylandaSegment._init_ok = True

    @staticmethod
    def set_option(grain_size=GrainSize.NORMAL, multi_grain_size=False, merge_org_in_n_grain_mode=False,
                   merge_mq_in_n_grain_mode=False, merge_date_time_words_in_n_grain_mode=False,
                   merge_book_title_in_n_grain_mode=False, merge_approximate_in_n_grain_mode=False,
                   just_output_word_in_keywords=False, output_delimiter=True, output_stop_word=True,
                   do_pos_tagging=False):
        """加载词典

        :param GrainSize grain_size: 分词结果的粒度
        :param bool multi_grain_size: 是否支持获取多粒度分词结果
        :param bool merge_org_in_n_grain_mode: 在普通颗粒下是否合并机构名
        :param bool merge_mq_in_n_grain_mode: 是否普通颗粒下合并输出数量词
        :param bool merge_date_time_words_in_n_grain_mode: 是否普通颗粒下合并输出时间词
        :param bool merge_book_title_in_n_grain_mode: 是否普通颗粒下合并输出书名
        :param bool merge_approximate_in_n_grain_mode: 是否普通颗粒下合并大约数
        :param bool just_output_word_in_keywords: 是否仅输出适合做关键词的词
        :param bool output_delimiter: 是否输出标点符号
        :param bool output_stop_word: 是否输出停止词，停止词是在用户自定义词典中定义的，例如： 的	stopword
        :param bool do_pos_tagging: 是否进行词性标注
        """
        HylandaSegment._option = HylandaSegment.SegOption()
        if grain_size == GrainSize.LARGE:
            HylandaSegment._option.grainSize = HylandaSegment.SegGrain.LARGE
        elif grain_size == GrainSize.SMALL:
            HylandaSegment._option.grainSize = HylandaSegment.SegGrain.SMALL
        HylandaSegment._option.multiGrainSize = multi_grain_size
        HylandaSegment._option.mergeOrgInNGMode = merge_org_in_n_grain_mode
        HylandaSegment._option.mergeMQInNGMode = merge_mq_in_n_grain_mode
        HylandaSegment._option.mergeDateTimeWordsInNGMode = merge_date_time_words_in_n_grain_mode
        HylandaSegment._option.mergeBookTitleInNGMode = merge_book_title_in_n_grain_mode
        HylandaSegment._option.mergeApproximateInNGMode = merge_approximate_in_n_grain_mode
        HylandaSegment._option.justOutputWordInKeywords = just_output_word_in_keywords
        HylandaSegment._option.outputDelimiter = output_delimiter
        HylandaSegment._option.outputStopWord = output_stop_word
        HylandaSegment._option.doPosTagging = do_pos_tagging

    @staticmethod
    def segment(text):
        """分词

        :param str text: 文本
        :returns: SegResult 分词结果对象
        """
        if HylandaSegment._init_ok:
            return HylandaSegment._segmentor.segment(text, HylandaSegment._option)
        else:
            return None

    @staticmethod
    def change_result_grain_size(seg_result, grain_size):
        """获得指定粒度的分词结果

        :param SegResult seg_result: segment方法返回的默认分词结果，该结果的粒度由传入segment的参数option
                                     中的grainSize属性定义
        :param GrainSize grain_size: 欲获取的分词结果的粒度
        :returns: SegResult 新的分词结果对象
        """
        grain_size = HylandaSegment.SegGrain.LARGE if grain_size == GrainSize.LARGE \
                     else (HylandaSegment.SegGrain.SMALL if grain_size == GrainSize.SMALL \
                     else HylandaSegment.SegGrain.NORMAL)
        return HylandaSegment.Segmentor.changeResultGrainSize(seg_result, grain_size)


    @staticmethod
    def seg_to_words(text):
        """分词

        :param str text: 文本
        :returns: Word 分词结果对象列表
        """
        if HylandaSegment._init_ok:
            return [Word(w) for w in HylandaSegment._segmentor.segment(text, HylandaSegment._option).toTokenArray()]
        else:
            return None

    @staticmethod
    def get_keywords(text):
        """获取关键词

        :param str text: 文本
        :returns: Word 关键词列表
        """
        if HylandaSegment._init_ok:
            seg_result = HylandaSegment._segmentor.segment(text, HylandaSegment._option)
            return [Word(w) for w in seg_result.getKeywordsArray()]
        else:
            return None

    @staticmethod
    def get_keywords_by_seg_result(seg_result):
        """获取关键词

        :param SegResult seg_result: 分词结果对象
        :returns: Word 关键词列表
        """
        if HylandaSegment._init_ok:
            return [Word(w) for w in seg_result.getKeywordsArray()]
        else:
            return None

    @staticmethod
    def get_segment_jar_path():
        """获取分词jar包完整路径
        """
        return os.path.join(HylandaSegment._get_jar_path(), 'segment-5.4.0.jar')

    @staticmethod
    def _get_jar_path():
        """获取分词jar包所在的目录路径
        """
        return os.path.join(dirname(abspath(__file__)), 'lib')
